#Nature's Kidneys: the Role of Wetland Reserve Easements in Restoring Water Quality
#Nicole Karwowski and Marin Skidmore
#Replication File
#2025

#Clean and merge animal unit data
#load libraries
library(ggplot2)
library(exactextractr)
library(data.table)
library(tidyverse)
library(Hmisc)
library(haven)
library(stargazer)
library(purrr)
library(tidyr)
library(collapse)
library(ggthemes)
library(mapview)
options(scipen=999)
library(stringr)
library(data.table)
library(usdarnass)

#open animal data
setwd("C:/Users/16308/OneDrive - Montana State University/UW_Madison/Water quality/Replication Package")

animal_totals_imputed <- read_dta("Data/NassAnimalUnits/Raw/animal_totals_imputed.dta")
colnames(animal_totals_imputed)
summary(animal_totals_imputed)

animal_totals_imputed$year<-as.character(animal_totals_imputed$year) 


#look at missing animal totals
missing<-subset(animal_totals_imputed, is.na(animal_totals_imputed$animals_weighted))

#open huc12 and county linkage file 
load("Data/CountyBoundaries/Processed/Huc12County.RData")

huc12<-subset(merge, select=c(huc12))
huc12<-distinct(huc12)

#there are 20,870 hucs in my sample. 
#create panel from 1997 to 2017. 
n<-1997:2017
df<-do.call("rbind", replicate(20870, n, simplify=FALSE))
df<-as.data.frame(df)
summary(df)

#bind with huc 12s. 
huc12<-cbind(huc12, df)

#reshape
#from wide to long
huc12w<- huc12 %>% pivot_longer(cols=2:22,
                                values_to="Year")

huc12w<-subset(huc12w, select=c(huc12, Year))

table(huc12w$Year)

rm(df)
rm(huc12)
rm(n)
rm(merge)

#merge huc12 panel with key that connects to county
#a huc 12 can connect to muliple counties. 
#each row represents a unique huc12-county-year combo
a<-merge(huc12w, key, by.x="huc12", by.y="huc12", all.x=TRUE, all.y=FALSE)
a<-a[order(a$huc12, a$Year, -a$intarea),]

a$Year<-as.character(a$Year)
a$FipsYear<-paste0(a$fips, a$Year, sep="")
animal_totals_imputed$FipsYear<-paste0(animal_totals_imputed$county_code, animal_totals_imputed$year, sep="")

#now merge with the animal units data
b<-merge(a, animal_totals_imputed, by.x="FipsYear", by.y="FipsYear", all.x=TRUE, all.y=FALSE)
a<-a[order(a$huc12, a$Year, -a$intarea),]

b<-b[order(b$huc12, b$Year, -b$intarea),]

#create var for animal units in each huc12 area. 
b$animalshuc<-b$animals_weighted*b$prop
summary(b$animalshuc)

na<-subset(b, is.na(b$animalshuc))
na_counties<-subset(na, select=c(fips, county.x, county.y, state, animals_weighted))
na_counties<-distinct(na_counties)

#about 3,000 obs have NA for animal units
#are these true zeroes? should we impute the average instead? 

#look like GALAX, VA is zero. small urban ceter. 
#RADFORD, VA is zero. urban. 
#mcdowell and mingo are forested urban counties in WV
#Issaquena is very small MS counties
#i am seeing a trend that these are very small countes, often in urban and forested areas
#assume the animal units there are 0. 

b$animalshuc<-ifelse(is.na(b$animalshuc), 0, b$animalshuc)

#now collapse by huc12 and year. 
#take sum of the animal units
c<-subset(b, select=c(huc12, Year, countn, prop, animalshuc))

d<- c %>% group_by(huc12, Year) %>% mutate(countn=first(countn),
                                           prop=sum(prop),
                                           animalshuc=sum(animalshuc))
d<-distinct(d)

#Save it and bring to merge file
e<-subset(d, select=c(huc12, Year, animalshuc))
colnames(e)<-c("huc12", "Year", "AnimalUnits")

animals_huc12<-e
save(animals_huc12, file="Data/NassAnimalUnits/Processed/AnimalUnits_hucyear.RData")

